#pragma once

#include "../Haar/HaarCoeffCubemap4Triple.hpp"
#include "../SH/SHCoeff.hpp"
#include <Math/Vector3.hpp>

namespace zzz{
template <int HAARN, int SHN, typename T1=float, typename T2=unsigned short >
class HaarSHTransformMatrix
{
public:
  typedef T1 VALUE_TYPE;
  typedef T2 INDEX_TYPE;
  static const int rown_=SHN*SHN;
  static const int coln_=HAARN;
  Vector<3,VALUE_TYPE> value[HAARN][SHN*SHN];
  VALUE_TYPE scale[6][SHN*SHN];
  INDEX_TYPE index[HAARN];
  inline void operator=(const HaarSHTransformMatrix<HAARN,SHN,VALUE_TYPE,INDEX_TYPE> &f)
  {
    memcpy(scale[0],f.scale[0],sizeof(VALUE_TYPE)*SHN*SHN);
    memcpy(scale[1],f.scale[1],sizeof(VALUE_TYPE)*SHN*SHN);
    memcpy(scale[2],f.scale[2],sizeof(VALUE_TYPE)*SHN*SHN);
    memcpy(scale[3],f.scale[3],sizeof(VALUE_TYPE)*SHN*SHN);
    memcpy(scale[4],f.scale[4],sizeof(VALUE_TYPE)*SHN*SHN);
    memcpy(scale[5],f.scale[5],sizeof(VALUE_TYPE)*SHN*SHN);
    for (int i=0; i<HAARN; i++) 
      memcpy(value[i],f.value[i],sizeof(Vector<3,VALUE_TYPE>)*SHN*SHN);
    memcpy(index,f.index,sizeof(INDEX_TYPE)*HAARN);
  }
  struct _sortnode{VALUE_TYPE sum;INDEX_TYPE index;int pos;};
  struct _SumGreater{bool operator()(const _sortnode &x,const _sortnode &y){return x.sum>y.sum;}};
  struct _IndexLess{bool operator()(const _sortnode &x,const _sortnode &y){return x.index<y.index;}};
  inline void operator+=(const HaarSHTransformMatrix<HAARN,SHN,VALUE_TYPE,INDEX_TYPE> &f)
  {
    for (int i=0; i<6; i++) for (int j=0; j<SHN*SHN; j++)
      scale[i][j]+=f.scale[i][j];
    
    Vector<3,VALUE_TYPE> sortdata[HAARN*2][SHN*SHN];
    _sortnode sortsum[HAARN*2];
    int cur=0,cur2=0;
    for (int cur1=0;cur1<HAARN;cur1++) 
    {
      while (cur2<HAARN && index[cur1]>f.index[cur2])
      {
        sortsum[cur].sum=0;
        sortsum[cur].index=f.index[cur2];
        sortsum[cur].pos=cur;
        for (int j=0; j<SHN*SHN; j++)
        {
          sortdata[cur][j]=f.value[cur2][j];
          sortsum[cur].sum+=abs(sortdata[cur][j].AbsMax());
        }
        cur++;cur2++;
      }
      if (cur2<HAARN && index[cur1]==f.index[cur2])
      {
        sortsum[cur].sum=0;
        sortsum[cur].index=index[cur1];
        sortsum[cur].pos=cur;
        for (int j=0; j<SHN*SHN; j++)
        {
          sortdata[cur][j]=value[cur1][j]+f.value[cur2][j];
          sortsum[cur].sum+=abs(sortdata[cur][j].AbsMax());
        }
        cur++;cur2++;
        continue;
      }
      if ((cur2>=HAARN) || (cur2<HAARN && index[cur1]<f.index[cur2]))
      {
        sortsum[cur].sum=0;
        sortsum[cur].index=index[cur1];
        sortsum[cur].pos=cur;
        for (int j=0; j<SHN*SHN; j++)
        {
          sortdata[cur][j]=value[cur1][j];
          sortsum[cur].sum+=abs(sortdata[cur][j].AbsMax());
        }
        cur++;
        continue;
      }
    }
    while (cur2<HAARN)
    {
      sortsum[cur].sum=0;
      sortsum[cur].index=f.index[cur2];
      sortsum[cur].pos=cur;
      for (int j=0; j<SHN*SHN; j++)
      {
        sortdata[cur][j]=f.value[cur2][j];
        sortsum[cur].sum+=abs(sortdata[cur][j].AbsMax());
      }
      cur++;cur2++;
    }
    nth_element(sortsum,sortsum+HAARN,sortsum+cur,_SumGreater());
    std::sort(sortsum,sortsum+HAARN,_IndexLess());
    for (int i=0; i<HAARN; i++) for (int j=0; j<SHN*SHN; j++)
      value[i][j]=sortdata[sortsum[i].pos][j];
    for (int i=0; i<HAARN; i++) 
      index[i]=sortsum[i].index;
  }
  inline void operator*=(const VALUE_TYPE f)
  {
    for (int i=0; i<6; i++) for (int j=0; j<SHN*SHN; j++)
      scale[i][j]*=f;
    for (int i=0; i<HAARN; i++) for (int j=0; j<SHN*SHN; j++)
      value[i][j]*=f;
  }
  inline void operator/=(const VALUE_TYPE f)
  {
    for (int i=0; i<6; i++) for (int j=0; j<SHN*SHN; j++)
      scale[i][j]/=f;
    for (int i=0; i<HAARN; i++) for (int j=0; j<SHN*SHN; j++)
      value[i][j]/=f;
  }
  inline void Zero()
  {
    memset(scale[0],0,sizeof(VALUE_TYPE)*SHN*SHN);
    memset(scale[1],0,sizeof(VALUE_TYPE)*SHN*SHN);
    memset(scale[2],0,sizeof(VALUE_TYPE)*SHN*SHN);
    memset(scale[3],0,sizeof(VALUE_TYPE)*SHN*SHN);
    memset(scale[4],0,sizeof(VALUE_TYPE)*SHN*SHN);
    memset(scale[5],0,sizeof(VALUE_TYPE)*SHN*SHN);
    memset(index,0,sizeof(INDEX_TYPE)*HAARN);
    for (int i=0; i<HAARN; i++)
      memset(value[i],0,sizeof(Vector<3,VALUE_TYPE>)*SHN*SHN);
  }
  inline Vector<3,VALUE_TYPE> &At(int haar,int sh)
  {
    return value[haar][sh];
  }
  inline const Vector<3,VALUE_TYPE> &At(int haar,int sh) const
  {
    return value[haar,sh];
  }
  inline INDEX_TYPE& Index(int n)
  {
    return index[n];
  }
  inline const INDEX_TYPE& Index(int n) const
  {
    return index[n];
  }

  inline void sort()
  {
    struct _sort
    {
      INDEX_TYPE index;
      int pos;
      bool operator<(const _sort &c) const{return index<c.index;}
    }sortindex[HAARN];
    for (int i=0; i<HAARN; i++)
    {
      sortindex[i].index=index[i];
      sortindex[i].pos=i;
    }
    sort(sortindex,sortindex+HAARN);
    Vector<3,VALUE_TYPE> tmp[HAARN][SHN*SHN];
    for (int i=0; i<HAARN; i++)
      memcpy(tmp[i],value[i],sizeof(Vector<3,VALUE_TYPE>)*SHN*SHN);
    for (int i=0; i<HAARN; i++) for (int j=0; j<SHN*SHN; j++)
      value[i][j]=tmp[sortindex[i].pos][j];
    for (int i=0; i<HAARN; i++)
      index[i]=sortindex[i];
  }
  template <int CUBEFACEN>
  inline void Multiply(const HaarCoeffCubemap4TripleSparse<CUBEFACEN,VALUE_TYPE,INDEX_TYPE> &h, SHCoeff<SHN,VALUE_TYPE> &res)
  {
    res.Zero();
    for (int j=0; j<SHN*SHN; j++)
      for (int i=0; i<6; i++)
      {
        res.v[j]+=scale[i][j]*h.Scale[i];
      }
    int cur2=0;
    for(int cur1=0;cur1<HAARN;cur1++)
    {
      while(cur2<h.Size && h.Coeff[cur2].index<index[cur1]) cur2++;
      if (cur2==h.Size) break;
      if (h.Coeff[cur2].index==index[cur1])
      {
        for (int i=0; i<SHN*SHN; i++)
          res.v[i]+=value[cur1][i].Dot(h.Coeff[cur2].value);
      }
    }
  }
  template <int CUBEFACEN>
  inline void Multiply(const HaarCoeffCubemap4TripleSparse<CUBEFACEN,VALUE_TYPE,INDEX_TYPE> &h, SHCoeff4f_SSE &res)
  {
    res.Zero();
    for (int j=0; j<SHN*SHN; j++)
      for (int i=0; i<6; i++)
      {
        res.v[j]+=scale[i][j]*h.Scale[i];
      }
    for(int cur1=0,cur2=0;cur1<HAARN;cur1++)
    {
#ifdef SPARSENEW
      int idx=index[cur1];
      int pos=h.IndexToLocal[idx];
      if (pos==0) continue;
      else
      {
        for (int i=0; i<SHN*SHN; i++)
          res.v[i]+=value[cur1][i].Dot(h.Coeff[pos].value);
      }
#else
      while(cur2<h.Size && h.Coeff[cur2].index<index[cur1]) cur2++;
      if (cur2==h.Size) break;
      if (h.Coeff[cur2].index==index[cur1])
      {
        for (int i=0; i<SHN*SHN; i++)
          res.v[i]+=value[cur1][i].Dot(h.Coeff[cur2].value);
      }
#endif
    }
  }
  template <int CUBEFACEN,int KEEPNUM>
  inline void Multiply(const HaarCoeffCubemap4TripleSparseStatic<CUBEFACEN,KEEPNUM,VALUE_TYPE,INDEX_TYPE> &h, SHCoeff4f_SSE &res)
  {
    res.Zero();
    for (int j=0; j<SHN*SHN; j++)
      for (int i=0; i<6; i++)
      {
        res.v[j]+=scale[i][j]*h.Scale[i];
      }
    int cur2=0;
    for(int cur1=0;cur1<HAARN;cur1++)
    {
      while(cur2<KEEPNUM && h.Coeff[cur2].index<index[cur1]) cur2++;
      if (cur2==KEEPNUM) break;
      if (h.Coeff[cur2].index==index[cur1])
      {
        for (int i=0; i<SHN*SHN; i++)
          res.v[i]+=value[cur1][i].Dot(h.Coeff[cur2].value);
      }
    }
  }
  template <int CUBEFACEN>
  inline void Multiply(const HaarCoeffCubemap4TripleFull<CUBEFACEN,VALUE_TYPE,INDEX_TYPE> &h, SHCoeff<SHN,VALUE_TYPE> &res)
  {
    res.Zero();
    for (int j=0; j<SHN*SHN; j++) for (int i=0; i<6; i++)
    {
      res.v[j]+=scale[i][j]*h.Scale[i];
    }
    for(int cur1=0;cur1<HAARN;cur1++)
    {
      for (int i=0; i<SHN*SHN; i++)
        res.v[i]+=value[cur1][i].Dot(h.Square[index[cur1]]);
    }
  }
  template <int CUBEFACEN>
  inline void Multiply(const HaarCoeffCubemap4TripleFull<CUBEFACEN,VALUE_TYPE,INDEX_TYPE> &h, SHCoeff4f_SSE &res)
  {
    res.Zero();
    for (int j=0; j<SHN*SHN; j++) for (int i=0; i<6; i++)
    {
      res.v[j]+=scale[i][j]*h.Scale[i];
    }
    for(int cur1=0;cur1<HAARN;cur1++)
    {
      for (int i=0; i<SHN*SHN; i++)
        res.v[i]+=value[cur1][i].Dot(h.Square[index[cur1]]);
    }
  }
};

template <int SHN, typename T1=float, typename T2=unsigned short >
class HaarSHTransformMatrixDynamic
{
public:
  typedef T1 VALUE_TYPE;
  typedef T2 INDEX_TYPE;
  Vector<3,VALUE_TYPE> **value;
  VALUE_TYPE scale[6][SHN*SHN];
  INDEX_TYPE *index;
  int Size;

  HaarSHTransformMatrixDynamic()
  {
    Size=0;
    value=NULL;
    index=NULL;
    for (int i=0; i<6; i++)
      memset(scale[i],0,sizeof(VALUE_TYPE)*SHN*SHN);
  }
  ~HaarSHTransformMatrixDynamic()
  {
    Clear();
  }
  inline void Create(int n)
  {
    if (Size==n) return;
    if (value)
      for (int i=0; i<Size; i++)
        delete[] value[i];
    delete[] value;
    value=NULL;
    delete[] index;
    index=NULL;
    Size=n;
    value=new Vector<3,VALUE_TYPE>*[Size];
    for (int i=0; i<Size; i++)
      value[i]=new Vector<3,VALUE_TYPE>[SHN*SHN];
    index=new INDEX_TYPE[Size];
  }
  inline void Clear()
  {
    memset(scale[0],0,sizeof(VALUE_TYPE)*SHN*SHN);
    memset(scale[1],0,sizeof(VALUE_TYPE)*SHN*SHN);
    memset(scale[2],0,sizeof(VALUE_TYPE)*SHN*SHN);
    memset(scale[3],0,sizeof(VALUE_TYPE)*SHN*SHN);
    memset(scale[4],0,sizeof(VALUE_TYPE)*SHN*SHN);
    memset(scale[5],0,sizeof(VALUE_TYPE)*SHN*SHN);
    if (value)
      for (int i=0; i<Size; i++)
        delete[] value[i];
    delete[] value;
    value=NULL;
    delete[] index;
    index=NULL;
    Size=0;
  }
  template <int HAARN>
  inline void operator=(const HaarSHTransformMatrix<HAARN,SHN,VALUE_TYPE,INDEX_TYPE> &f)
  {
    Clear();
    for (int i=0; i<6; i++) 
      memcpy(scale[i],f.scale[i],sizeof(VALUE_TYPE)*SHN*SHN);
    Size=HAARN;
    value=new Vector<3,VALUE_TYPE>*[Size];
    for (int i=0; i<Size; i++)
      value[i]=new Vector<3,VALUE_TYPE>[SHN*SHN];
    index=new INDEX_TYPE[Size];
    for (int i=0; i<Size; i++)
    {
      memcpy(value[i],f.value[i],sizeof(Vector<3,VALUE_TYPE>)*SHN*SHN);
    }
    memcpy(index,f.index,sizeof(INDEX_TYPE)*Size);
  }

  template <int HAARN>
  inline void operator+=(const HaarSHTransformMatrix<HAARN,SHN,VALUE_TYPE,INDEX_TYPE> &f)
  {
    for (int i=0; i<6; i++) for (int j=0; j<SHN*SHN; j++)
      scale[i][j]+=f.scale[i][j];
    
    Vector<3,VALUE_TYPE> **sortdata;
    sortdata=new Vector<3,VALUE_TYPE>*[HAARN+Size];
    for (int i=0; i<HAARN+Size; i++)
      sortdata[i]=new Vector<3,VALUE_TYPE>[SHN*SHN];
    INDEX_TYPE *sortindex=new INDEX_TYPE[HAARN+Size];
    int cur=0,cur2=0;
    for (int cur1=0;cur1<Size;cur1++) 
    {
      while (cur2<HAARN && index[cur1]>f.index[cur2])
      {
        for (int j=0; j<SHN*SHN; j++)
          sortdata[cur][j]=f.value[cur2][j];
        sortindex[cur]=f.index[cur2];
        cur++;cur2++;
      }
      if (cur2<HAARN && index[cur1]==f.index[cur2])
      {
        for (int j=0; j<SHN*SHN; j++)
          sortdata[cur][j]=value[cur1][j]+f.value[cur2][j];
        sortindex[cur]=index[cur1];
        cur++;cur2++;
        continue;
      }
      if ((cur2>=HAARN) || (cur2<HAARN && index[cur1]<f.index[cur2]))
      {
        for (int j=0; j<SHN*SHN; j++)
          sortdata[cur][j]=value[cur1][j];
        sortindex[cur]=index[cur1];
        cur++;
        continue;
      }
    }
    for (int i=0; i<Size; i++)
      delete[] value[i];
    delete[] value;
    value=sortdata;
    Size=cur;
    delete[] index;
    index=sortindex;
  }

  inline void Zero()
  {
    Clear();
  }

  template <int CUBEFACEN>
  inline void Multiply(const HaarCoeffCubemap4TripleSparse<CUBEFACEN,VALUE_TYPE,INDEX_TYPE> &h, SHCoeff<SHN,VALUE_TYPE> &res)
  {
    res.Zero();
    for (int j=0; j<SHN*SHN; j++)
      for (int i=0; i<6; i++)
      {
        res.v[j]+=scale[i][j]*h.Scale[i];
      }
    for(int cur1=0,cur2=0;cur1<Size;cur1++)
    {
      while(cur2<h.Size && h.Coeff[cur2].index<index[cur1]) cur2++;
      if (cur2==h.Size) break;
      if (h.Coeff[cur2].index==index[cur1])
      {
        for (int i=0; i<SHN*SHN; i++)
          res.v[i]+=value[cur1][i].Dot(h.Coeff[cur2].value);
      }
    }
  }

  template <int CUBEFACEN>
  inline void Multiply(const HaarCoeffCubemap4TripleSparse<CUBEFACEN,VALUE_TYPE,INDEX_TYPE> &h, SHCoeff4f_SSE &res)
  {
    res.Zero();
    for (int j=0; j<SHN*SHN; j++)
      for (int i=0; i<6; i++)
      {
        res.v[j]+=scale[i][j]*h.Scale[i];
      }
    for(int cur1=0,cur2=0;cur1<Size;cur1++)
    {
      while(cur2<h.Size && h.Coeff[cur2].index<index[cur1]) cur2++;
      if (cur2==h.Size) break;
      if (h.Coeff[cur2].index==index[cur1])
      {
        for (int i=0; i<SHN*SHN; i++)
          res.v[i]+=value[cur1][i].Dot(h.Coeff[cur2].value);
      }
    }
  }
};
}